 /*******************************************************************************
  * Copyright (c) 2006, 2007 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  * IBM Corporation - initial API and implementation
  *******************************************************************************/
 package org.eclipse.osgi.util;

 import java.util.Locale ;

 /**
  * This class is used to process strings that have special semantic meaning
  * (such as file paths) in RTL-oriented locales so that they render in a way
  * that does not corrupt the semantic meaning of the string but also maintains
  * compliance with the Unicode BiDi algorithm of rendering Bidirectional text.
  * <p>
  * Processing of the string is done by breaking it down into segments that are
  * specified by a set of user provided delimiters. Directional punctuation
  * characters are injected into the string in order to ensure the string retains
  * its semantic meaning and conforms with the Unicode BiDi algorithm within each
  * segment.
  * </p>
  *
  * @since 3.2
  */
 public class TextProcessor {

     // commonly used delimiters
 /**
      * Dot (.) delimiter. Used most often in package names and file extensions.
      */
     private static final String DOT = "."; //$NON-NLS-1$

     /**
      * Colon (:) delimiter. Used most often in file paths and URLs.
      */
     private static final String COLON = ":"; //$NON-NLS-1$

     /**
      * Forward slash (/) delimiter. Used most often in file paths and URLs.
      */
     private static final String FILE_SEP_FSLASH = "/"; //$NON-NLS-1$

     /**
      * Backslash (\) delimiter. Used most often in file paths.
      */
     private static final String FILE_SEP_BSLASH = "\\"; //$NON-NLS-1$

     /**
      * The default set of delimiters to use to segment a string.
      */
     private static final String delimiterString = DOT + COLON + FILE_SEP_FSLASH
             + FILE_SEP_BSLASH;

     // left to right marker
 private static final char LRM = '\u200e';

     // left to right embedding
 private static final char LRE = '\u202a';

     // pop directional format
 private static final char PDF = '\u202c';

     // whether or not the locale BiDi
 private static boolean isBidi = false;

     // whether or not the current platform supports directional characters
 private static boolean isSupportedPlatform = false;

     // constant used to indicate an LRM need not precede a delimiter
 private static final int INDEX_NOT_SET = 999999999;

     static {
         Locale locale = Locale.getDefault();
         String lang = locale.getLanguage();

         if ("iw".equals(lang) || "he".equals(lang) || "ar".equals(lang) || "fa".equals(lang) || "ur".equals(lang)) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
 isBidi = true;
         
         String osName = System.getProperty("os.name").toLowerCase(); //$NON-NLS-1$
 if (osName.startsWith("windows") || osName.startsWith("linux")) { //$NON-NLS-1$ //$NON-NLS-2$

             // Only consider platforms that can support control characters
 isSupportedPlatform = true;
         }
     }

     /**
      * Process the given text and return a string with the appropriate
      * substitution based on the locale. This is equivalent to calling
      * <code>process(String, String)</code> with the default set of
      * delimiters.
      *
      * @param text
      * the text to be processed
      * @return the manipulated string
      * @see #process(String, String)
      * @see #getDefaultDelimiters()
      */
     public static String process(String text) {
         return process(text, getDefaultDelimiters());
     }

     /**
      * Process a string that has a particular semantic meaning to render on BiDi
      * locales in way that maintains the semantic meaning of the text, but
      * differs from the Unicode BiDi algorithm. The text is segmented according
      * to the provided delimiters. Each segment has the Unicode BiDi algorithm
      * applied to it, but as a whole, the string is oriented left to right.
      * <p>
      * For example a file path such as <tt>d:\myFolder\FOLDER\MYFILE.java</tt>
      * (where capital letters indicate RTL text) should render as
      * <tt>d:\myFolder\REDLOF\ELIFYM.java</tt> when using the Unicode BiDi
      * algorithm and segmenting the string according to the specified delimiter
      * set.
      * </p>
      * <p>
      * The following algorithm is used:
      * <ol>
      * <li>Scan the string to locate the delimiters.</li>
      * <li>While scanning, note the direction of the last strong character
      * scanned. Strong characters are characters which have a BiDi
      * classification of L, R or AL as defined in the Unicode standard.</li>
      * <li>If the last strong character before a separator is of class R or AL,
      * add a LRM before the separator. Since LRM itself is a strong L character,
      * following separators do not need an LRM until a strong R or AL character
      * is found.</li>
      * <li>If the component where the pattern is displayed has a RTL basic
      * direction, add a LRE at the beginning of the pattern and a PDF at its
      * end. The string is considered to have RTL direction if it contains RTL
      * characters and the runtime locale is BiDi. There is no need to add
      * LRE/PDF if the string begins with an LTR letter, contains no RTL letter,
      * and ends with either a LTR letter or a digit.</li>
      * </ol>
      * </p>
      * <p>
      * NOTE: this method will change the shape of the original string passed in
      * by inserting punctuation characters into the text in order to make it
      * render to correctly reflect the semantic meaning of the text. Methods
      * like <code>String.equals(String)</code> and
      * <code>String.length()</code> called on the resulting string will not
      * return the same values as would be returned for the original string.
      * </p>
      *
      * @param str
      * the text to process, if <code>null</code> return the string
      * as it was passed in
      * @param delimiter
      * delimiters by which the string will be segmented, if
      * <code>null</code> the default delimiters are used
      * @return the processed string
      */
     public static String process(String str, String delimiter) {
         if (str == null || str.length() <= 1 || !isSupportedPlatform || !isBidi)
             return str;

         // do not process a string that has already been processed.
 if (str.charAt(0) == LRE && str.charAt(str.length() - 1) == PDF) {
             return str;
         }
         
         // String contains RTL characters
 boolean isStringBidi = false;
         // Last strong character is RTL
 boolean isLastRTL = false;
         // Last candidate delimiter index
 int delimIndex = INDEX_NOT_SET;

         delimiter = delimiter == null ? getDefaultDelimiters() : delimiter;
         
         StringBuffer target = new StringBuffer ();
         target.append(LRE);
         char ch;

         for (int i = 0, n = str.length(); i < n; i++) {
             ch = str.charAt(i);
             if (delimiter.indexOf(ch) != -1) {
                 // character is a delimiter, note its index in the buffer
 if (isLastRTL) {
                     delimIndex = target.length();
                 }
             } else if (Character.isDigit(ch)) {
                 if (delimIndex != INDEX_NOT_SET) {
                     // consecutive neutral and weak directional characters
 // explicitly force direction to be LRM
 target.insert(delimIndex, LRM);
                     delimIndex = INDEX_NOT_SET;
                     isLastRTL = false;
                 }
             } else if (Character.isLetter(ch)) {
                 if (isRTL(ch)) {
                     isStringBidi = true;
                     if (delimIndex != INDEX_NOT_SET) {
                         // neutral character followed by strong right directional character
 // explicitly force direction to be LRM
 target.insert(delimIndex, LRM);
                         delimIndex = INDEX_NOT_SET;
                     }
                     isLastRTL = true;
                 } else {
                     // strong LTR character, no LRM will be required
 delimIndex = INDEX_NOT_SET;
                     isLastRTL = false;
                 }
             }
             target.append(ch);
         }
         /*
          * TextProcessor is not aware of the orientation of the component owning
          * the processed string. Enclose the string in LRE/PDF in either of 2
          * cases:
          * (1) The string contains BiDi characters - implying that the
          * string appearance depends on the basic orientation
          * (2) The runtime locale is BiDi AND either the string does not start with
          * an LTR character or it ends with LTR char or digit.
          */
         if (isStringBidi || !Character.isLetter(str.charAt(0))
                 || isNeutral(str.charAt(str.length() - 1))) {
             target.append(PDF);
             return target.toString();
         }
         // Otherwise, return the original string
 return str;
     }

     /**
      * Removes directional marker characters in the given string that were inserted by
      * utilizing the <code>process(String)</code> or <code>process(String, String)</code>
      * methods.
      *
      * @param str string with directional markers to remove
      * @return string with no directional markers
      * @see #process(String)
      * @see #process(String, String)
      * @since 3.3
      */
     public static String deprocess(String str){
         // don't do all the work if not a valid case
 if (str == null || str.length() <= 1 || !isSupportedPlatform || !isBidi)
             return str;
         
         StringBuffer buf = new StringBuffer ();
         for (int i = 0; i < str.length(); i++){
             char c = str.charAt(i);
             switch(c){
                 case LRE: continue;
                 case PDF: continue;
                 case LRM: continue;
                 default:
                     buf.append(c);
             }
         }
         
         return buf.toString();
     }
     
     /**
      * Return the string containing all the default delimiter characters to be
      * used to segment a given string.
      *
      * @return delimiter string
      */
     public static String getDefaultDelimiters() {
         return delimiterString;
     }

     /*
      * Return whether or not the character falls is right to left oriented.
      */
     private static boolean isRTL(char c) {
         /*
          * Cannot use Character.getDirectionality() since the OSGi library can
          * be compiled with execution environments that pre-date that API.
          *
          * The first range of characters is Unicode Hebrew and Arabic
          * characters. The second range of characters is Unicode Hebrew and
          * Arabic presentation forms.
          *
          * NOTE: Farsi and Urdu fall within the Arabic scripts.
          */
         return (((c >= 0x05d0) && (c <= 0x07b1)) || ((c >= 0xfb1d) && (c <= 0xfefc)));
     }

     /*
      * Return whether or not the given character has a weak directional type
      */
     private static boolean isNeutral(char c) {
         return !(Character.isDigit(c) || Character.isLetter(c));
     }

     /*
      * Constructor for the class.
      */
     private TextProcessor() {
         // prevent instantiation
 }
 }

